/*****************************************************************************************
  Project   : The Impact of Unions on Non-union Wage Setting: Threats and Bargaining
  Authors   : David A. Green, Ben M. Sand, Iain G. Snoddy, Jeanne Tschopp
  Date      : August 2025
  Output    : Creates the data for the decomposition used in 
			: Appendix Table 8
  Notes     : This file has options for:
			: demographic group `1', 
            : whether to create figures`2' (no longer in use, figures created in R, see master file)
			: transition case `3'
			: how many repititions `4'
			: base year `5'
			: subgroup specific or dynamic (chained) ? `5'
*****************************************************************************************/



clear
set more off
cap log close

/*

Decomposition after the new model, Nov 10th, 2023

*/

global decomp decomp_`6'
cap mkdir ${wd}/${decomp}

global Use_data "${wd}/decomp_out_${ind}_${area}_`1'_case_`3'.dta"
local case `3'
local figures `2'
local reps `4'
local baseyr `5'
local sub `6'

// weights 
use  $Use_data, clear

// Weights for aggregation
	replace reg_wgt = 0 if reg_wgt == .
	egen temp = sum(reg_wgt) if year == 1 , by($area)
	egen wt = mean(temp), by($area) 
	drop temp
	
collapse (mean) wt, by(year2 $area)

save ${wd}/${decomp}/agg_wt.dta, replace

//
// set up transitions data [only needs to be done once]
//

use  $Use_data, clear
keep year2 $area $ind union


joinby year2 detind  union using ${wd}/ipums_ind_trans/transitions_`1'_case_`case'.dta, unmatched(both)
tab _merge
keep if _merge==3
drop _merge

// get base year transition rates
	egen tmp = mean(phi_tt) if year2 == `baseyr', by(job job_2)
	egen tmp2 = mean(tmp), by(job job_2)
	gen sphi_tt = tmp2

save "${wd}/${decomp}/temp_decomp_trans.dta", replace
	
//
// set up shares data
//

use  $Use_data, clear

// ----- > weights for city aggregation

	replace reg_wgt = 0 if reg_wgt == .
	egen temp = sum(reg_wgt) if year == 1 , by($area)
	egen wt = mean(temp), by($area) 
	drop temp


// -----> Shares


	// base size
	egen tmp = sum(reg_wgt) if year2 == `baseyr', by($ind union)
	egen base = mean(tmp), by($ind union)
	drop tmp

	egen tmp = sum(reg_wgt), by(year2 $ind union)
	gen rel = tmp / base
	replace rel = 0 if missing(rel)
	drop tmp

	egen x = mean(rel) if union == 0, by(year2 $ind)
	egen xx = mean(x), by(year2 $ind)
	
	
	egen tmp = mean(reg_wgt) if union == 1 & year2 == `baseyr', by( ${area} $ind)
	egen tmp2 = mean(tmp) if union == 1, by(${area} $ind)

	gen tmp3 = reg_wgt if union == 0
	replace tmp3 = tmp2 * xx if union == 1

	// job share within city
	egen sn_jc = sum(tmp3), by(year $area)
	replace sn_jc = tmp3 / sn_jc
	
	drop tmp*
	
// -----> union premium	


	egen id = group(year2 $area $ind)
	tsset id union
	gen uprem = D.prem

	egen tmp = mean(uprem), by(year $ind)
	replace uprem = tmp
	drop tmp
		
	egen tmp = wtmean(uprem) if year2 == `baseyr', by($area $ind union ) 
	egen tmp2 = mean(tmp), by($area $ind union )
	gen uprem_0 = tmp2
	drop tmp*
	tsset, clear
	
	gen reswage_0 = reswage - uprem + uprem_0 if union == 1
	replace reswage = . if reswage_0 == . & union == 1
	
// ------> threat

// new at city-ind
	merge m:1 ${ind} ${area} year2 using  ${wd}/keyvars/union_elections_${area}_${ind}.dta
	tab _merge
	drop if _merge == 2
	drop _merge
	gen threat  = win_frac_ic_5 
	*drop win_*  cert*  frac_*

	egen tmp = mean(threat) if year2 == `baseyr', by($area $ind union)
	egen threat_var = mean(tmp), by($area $ind union)
	drop tmp*
	
	replace threat = . if threat_var == .

	bysort year detind $area: gen c = !missing(threat) if union == 0
	egen cc = mean(c), by(year detind $area )
	keep if cc == 1 | year ==5
	drop c cc
	
	// save for later merge
	save "${wd}/${decomp}/temp_decomp_shares2.dta", replace
	
	// save for sim
	keep year2 $area $ind union n_jc sn_jc reswage reswage_0 
	rename $ind detind_2
	rename union union_2

	save "${wd}/${decomp}/temp_decomp_shares.dta", replace
	
// combine
use "${wd}/${decomp}/temp_decomp_trans.dta", clear

// merge shares
joinby year2 $area detind_2 union_2 using "${wd}/${decomp}/temp_decomp_shares.dta", unmatched(both)
tab _merge
keep if _merge==3
drop _merge

save "${wd}/${decomp}/rep_three.dta"	, replace
	
	
forval i = 1/`reps' {

use "${wd}/${decomp}/rep_three.dta", clear

if `i' > 1 drop reswage reswage_0

// ------------------------------------
// Full
if `i' == 1 {
	gen     Full = reswage_0 if union_2 == 1
	replace Full = reswage   if union_2 == 0
}
// ------------------------------------
// Threat
if `i' == 1 {
	gen     First = reswage   if union_2 == 1
	replace First = reswage   if union_2 == 0
	}
// ------------------------------------
// Premia
if `i' == 1 {
	gen     Second = reswage   if union_2 == 1
	replace Second = reswage   if union_2 == 0
	}	
// ------------------------------------
// Trans
if `i' == 1 {
	gen     Third = reswage   if union_2 == 1
	replace Third = reswage   if union_2 == 0
	}	
		
if `i' > 1 {
	joinby year2 $area detind_2 union_2 using "${wd}/${decomp}/rep_two.dta", unmatched(both)
	tab _merge
	keep if _merge==3
	drop _merge
	
		
}

replace reswage = . if missing(Full)
gen miss = 1 if missing(reswage) | missing(Full) | missing(First) | missing(Second) 

// Relative costs
gen double temp = phi_tt * phi_ii * n_jc  if !missing(reswage)
egen temp2 = sum( temp ), by(job $area year2)
drop temp
gen double Xi =  (phi_tt * phi_ii) / temp2
drop temp2 


// Relative costs
gen double temp = sphi_tt * phi_ii * sn_jc  if !missing(Full)
egen temp2 = sum( temp ), by(job $area year2)
drop temp
gen double sXi =  (sphi_tt * phi_ii) / temp2 if !missing(Full)
drop temp2 

// outside options
gen double  E_c  = Xi * n_jc * reswage

// cf outside options
gen double E_full   = sXi * sn_jc * Full
gen double E_first  = sXi * sn_jc * First


drop Xi
// Relative costs
gen double temp = phi_tt * phi_ii * n_jc  if !missing(Second)
egen temp2 = sum( temp ), by(job $area year2)
drop temp
gen double Xi =  (phi_tt * phi_ii) / temp2 if !missing(Second)
drop temp2 


gen double E_second = Xi * n_jc * Second

drop Xi
// Relative costs
gen double temp = phi_tt * phi_ii * n_jc  if !missing(Third)
egen temp2 = sum( temp ), by(job $area year2)
drop temp
gen double Xi =  (phi_tt * phi_ii) / temp2  if !missing(Third)
drop temp2 
gen double E_third  = Xi * n_jc * Third

// Go back to job city year obs level
collapse (sum) E* , by(year2 $area detind union) fast

foreach var of varlist E*  {
	replace `var' = . if `var' == 0
}

// Save to merge back into orginal data 
*tempfile temp
save "${wd}/${decomp}/rep_one.dta", replace

*use  $Use_data, clear
use "${wd}/${decomp}/temp_decomp_shares2.dta", clear

merge 1:1 year2 $area $ind union using "${wd}/${decomp}/rep_one.dta"
tab _merge
drop _merge 


// retain union vars
foreach var of varlist E_* {
	gen X = `var' if union == 1
	egen `var'u = mean(X), by(year2 $area $ind)
	drop X	
	}
	
// Inner repitition calculating


// Coefficients for Decomposition
// - chain decomp with changing coefficients

if "`sub'" == "dynamic" {
	if `baseyr' == 1 { 
		gen alpha0 = 0.833
		gen alpha1 = 0.926
	}
	if `baseyr' == 2 { 
		gen alpha0 = -0.546
		gen alpha1 = -0.893
	}
	if `baseyr' == 3 { 
		gen alpha0 = 0.827
		gen alpha1 = 0.805
	}
}

// decomps with sub group specific coefficients
if "`sub'" == "Men" {
    gen alpha0 = 0.58
    gen alpha1 = 0.75
}

if "`sub'" == "Women" {
    gen alpha0 = 0.67
    gen alpha1 = 0.75
}

if "`sub'" == "EF_young_low" {
    gen alpha0 = 0.63
    gen alpha1 = 0.56
}

if "`sub'" == "EF_young_high" {
    gen alpha0 = -0.03
    gen alpha1 = 0.06
}

if "`sub'" == "EF_old_low" {
    gen alpha0 = 0.68
    gen alpha1 = 1.07
}

if "`sub'" == "EF_old_high" {
    gen alpha0 = 0.36
    gen alpha1 = 0.50
}
// regular decomp, main coefficients
else if "`sub'" == "chain" {
	gen alpha0 = 0.647
	gen alpha1 = 0.77
}

// Variables for Decomposition

// ------------------------------
// Full CF 
// CF wages [non-union]
gen a = alpha0 * E_c    + threat     * (alpha1 * E_cu    - alpha0 * E_c)    if union == 0
gen b = alpha0 * E_full + threat_var * (alpha1 * E_fullu - alpha0 * E_full) if union == 0
gen c = b - a if union == 0

gen Full = reswage + c if union == 0
gen save_c = c
drop b  c

// ------------------------------
// Threat
gen b = alpha0 * E_first + threat_var * (alpha1 * E_firstu - alpha0 * E_first) if union == 0
gen c = b - a if union == 0

gen First = reswage + c if union == 0

drop b c 

// ------------------------------
// union premia
gen b = alpha0 * E_second + threat_var * (alpha1 * E_secondu - alpha0 * E_second) if union == 0
gen c = b - a if union == 0

gen Second = reswage + c if union == 0

drop b c 
// ------------------------------
// transitions
gen b = alpha0 * E_third + threat_var * (alpha1 * E_thirdu - alpha0 * E_third) if union == 0
gen c = b - a if union == 0

gen Third = reswage + c if union == 0

drop a b c 

// ------------------------------
// Full CF 
// wages [union]
replace alpha1 = .414
gen a = alpha1 * E_c  if union == 1
gen b = alpha1 * E_full if union == 1
gen c = b - a if union == 1


replace Full   = reswage_0 + c if union == 1
replace First  = reswage +  c if union == 1

drop b  c
gen b = alpha1 * E_second if union == 1
gen c = b - a

replace Second = reswage + c if union == 1

drop b c 
gen b = alpha1 * E_third if union == 1
gen c = b - a

replace Third = reswage + c if union == 1

drop a b c 

replace reswage = . if missing(Full) 

if `i' < `reps' {
	
	egen W_obs    = wtmean(reswage) , by(year2 ${area} union) weight(n_jc)
	egen W_full   = wtmean(Full)    , by(year2 ${area} union) weight(sn_jc)
	egen W_first  = wtmean(First)   , by(year2 ${area} union) weight(sn_jc)
	egen W_second = wtmean(Second)  , by(year2 ${area} union) weight(n_jc)
	egen W_third  = wtmean(Third)   , by(year2 ${area} union) weight(n_jc)
	
	keep year ${area} $ind union reswage Full First Second Third

	rename $ind detind_2
	rename union union_2
	
	save "${wd}/${decomp}/rep_two.dta", replace 
	}
	
	
}


// city-year level
	
	egen W_obs    = wtmean(reswage) , by(year2 ${area} union) weight(n_jc)
	egen W_full   = wtmean(Full)    , by(year2 ${area} union) weight(sn_jc)
	egen W_first  = wtmean(First)   , by(year2 ${area} union) weight(sn_jc)
	egen W_second = wtmean(Second)  , by(year2 ${area} union) weight(n_jc)
	egen W_third  = wtmean(Third)   , by(year2 ${area} union) weight(n_jc)

*egen tmp = wtmean(union) if year2 == 1, by(year2 ${area}) weight(n_jc)
*egen union_share80 = mean(tmp), by(${area})
*drop tmp

egen union_share = wtmean(union), by(year2 ${area}) weight(n_jc)

collapse (mean) union_sh* W_* , by(year $area union ) fast

// reshape
reshape wide W_* union_share*  , i( year  ${area} ) j(union) 
drop union_share1 //union_share801
rename union_share0 union_share
*rename union_share800 union_share80

tempfile temp
save `temp', replace

use ${wd}/${decomp}/agg_wt.dta, clear

sort year2 $area
merge 1:1 year2 $area using  `temp'
drop if _merge != 3

// collapse
	collapse (mean) W_* union_sh* [aw = wt], by(year2)
	tsset year2, yearly	

// breakdown non-union
	// full obs - full CF
	gen double Full0   = W_full0   - W_obs0
	
	gen double Premia0  = W_full0   - W_first0
	gen double Trans0   = W_first0  - W_second0
	gen double Threat0  = W_second0 - W_obs0 
	
	gen double Spill0   = W_full0   - W_second0 // ie, Prema + trans
	
//////////////////////////////////////////////
// New decomp for the revision
//////////////////////////////////////////////


// Overall observed wage trends
gen observed = W_obs0 * (1 - union_share  ) + W_obs1 * union_share - (W_obs0[1] * (1 - union_share[1]) + W_obs1[1] * union_share[1])

// part due to changing union proportion
gen part1   = (union_share - union_share[1]) * (W_obs1 - W_obs0)

// part due to changing wage differentials
gen part2   = union_share[1] * (W_obs1 - W_obs0 - (W_obs1[1] - W_obs0[1]))

// CF 2 - non-union wage trends (ie, holdign union proportion and differential constant)
gen cf2   = W_obs0 - W_obs0[1] 

// CF 1 - holding union proportion constant.
gen cf1 = part2 + cf2

// Decomposing CF 2 into de-union effect and CF 3	

// Deunion	
gen deunion  =  W_obs0 - W_full0

// CF 3 - full counterfactual
gen cf3      =  W_full0 - W_obs0[1]

// full decomp of compsition effect

gen cf_threat = W_obs0 - W_second0
gen cf_transition = W_second0 -  W_first0
gen cf_premia =  W_first0 - W_full0

// Save output
save ${wd}/${decomp}/decomp_grp_`1'_case_`3'_chain_`baseyr'.dta, replace
















